knitr::opts_chunk$set(echo = TRUE)
options(scipen = 999)

# Packages used.
package_list <- c("tidyverse", "readxl", "knitr", "mlr", "kableExtra", "corrplot", "ggmosaic", "magrittr", "caret", "randomForest", "reshape", "gridExtra", "GGally", "purrr", "cowplot", "rpart", "rpart.plot", "leaps", "plyr", "grid")
lapply(package_list, library, character.only=T)

theme_set(theme_bw())

# Reusable colors for attrition.
attcol <- c("No"="seagreen3", "Yes"="indianred3")
# Reusable Likert scale colors.
likert <- c("1"="indianred3", "2"="bisque1", "3"="olivedrab3", "4"="springgreen3")
# Distinct colors, borrowed from https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/
hardcolors <- c("#E6194B", "#3CB44B", "#FFE119", "#4363D8", "#F58231", "#911EB4", "#42D4F4", "#F032E6", "#BFEF45")
softcolors <- c("#469990", "#E6BEFF", "#FF496D", "#98F5FF", "#FFFAC8", "#AAFFC3", "#FFD8B1", "#808000", "#9A6324")

# Reused function in tables to clean up outputs on mean.
SimpleMean <- function(var) {
  round(mean(var),2)
}

# Function that produces ggplot boxplots. Takes nominal groups on x-axis. Prefers continuous variable on y-axis.
ContVsNomBoxplots <- function(df, xvar="groups", yvar="continuous") {
  ggplot(df, aes_string(x=xvar, y=yvar, group=xvar, fill=xvar)) +
    geom_boxplot() +
    scale_fill_manual(values=hardcolors) +
    stat_summary(fun.y=mean, color="black", geom="point", shape=5, size=2) +
    guides(fill=guide_legend(title.position="top", title.hjust=0.5, ncol=2, byrow=F)) +
    theme(axis.text.x=element_blank(),
          axis.text.y=element_text(size=6),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          legend.key.size=unit(1,"line"),
          legend.position="bottom",
          legend.text=element_text(size=5),
          legend.title=element_text(size=6),
          plot.title = element_text(hjust = 0.5, size=8))
}

# Function that produces ggplot density plots. Takes continuous variable on x-axis. Intended for group as color.
ContDensity <- function(df, xvar="cont", yvar="groups") {
  ggplot(df, aes_string(x=xvar, color=yvar)) +
    geom_density(alpha=0.3) +
    scale_color_manual(values=attcol) +
    guides(fill=guide_legend(title.position="top", title.hjust=0.5)) +
    theme(axis.text.x=element_text(size=6),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          legend.key.size=unit(1,"line"),
          legend.position="bottom",
          legend.text=element_text(size=5),
          legend.title=element_text(size=6),
          plot.title = element_text(hjust = 0.5, size=8))
}

# Function that produces ggplot jittered scatter plots. Intended continuous variable on x-axis, discrete variable on y-axis, grouping variable for color.
DiscVsContScatter <- function(df, xvar="cont", yvar="disc", groupvar="group") {
  ggplot(df, aes_string(x=xvar, y=yvar, color=groupvar)) +
    geom_jitter(alpha=0.3, width=0.1) +
    theme(axis.ticks=element_blank(),
          plot.title=element_text(hjust=0.5)) +
    geom_smooth(method="lm", se=T, level=0.95)
}

Introduction

DDSAnalytics provides talent management solutions for Fortune 1000 companies. For these highly influential and competitive American companies, employee retention is critical for productivity, morale, and the bottom line. Therefore, identifying factors that could possibly predict employee turnover and then developing appropriate retention strategies is of great interest.

Our team of data scientists will analyze an existing data set of employee survey responses to identify major factors that contributed to attrition. Additionally, we will also point out other interesting trends that may provide insights for future development and retention strategies.

2. Load/Clean Data

## [1] 1470   35

The original data set provided by DDSAnalytics contains observations on 1,470 employees with information on 35 variables. In other words, the data set comprises a data frame of 1,470 rows and 35 columns. Whether this data comes from one company or multiple is unspecified.

Two versions of the original employee data frame are created. One converts all character variables to factors. The other goes a step further and coerces all variables to numeric. Both are used in different ways during the analysis.

Quick exploratory plots of all 35 variables on all of the data reveal some useful tidbits right off the bat. The histogram shapes alone show some interesting trends.

  • Distance From Home, Percent Salary Increase, Total Working Years, Years At Company, Years In Current Role, Years Since Last Promotion, and Years With Current Manager have similar positively skewed distributions with long right tails. Some of these variables are likely correlated with each other.
    • A spike exists for year 6 in Years In Current Role (and by extension, likely correlated variables Years With Current Manager and Years At Company), potentially indicating a large influx of employees that year who are still with the company.
  • Employee ID has a maximum value above 2000. The data set contains only 1,470 observations, meaning this may be a subset where other employee data has already been removed.
  • Environment Satisfacion, Job Satisfaction, and Relationship Satisfaction have noticeable proportions of dissatisfied employees (where a score of 1 indicates low satisfaction).
  • Job Level for most employees is 1 or 2.
  • Performance Rating, defined on a scale of 1 to 4, only has scores of 3 and 4 represented.

The exploratory bar graphs are similarly informative.

  • Department shows that most employees are in Research & Development, followed by Sales. On a related note, Job Role shows most employees are Sales Executives, Research Scientists, or Laboratory Technicians; Education Field shows most have a background in Life Sciences or Medical.
  • Employee Count is 1 for all employees in the data set because each row corresponds to answers for only one employee.
  • Over 18 is Yes for all employees (including employees who are 18 years old).
  • Standard Hours is 80 for all employees in the data set. Assuming a standard 40-hour workweek, all of the represented companies use bi-weekly paychecks.
##          Age    Attrition    BusTravel    DailyRate   Department 
##            0            0            0            0            0 
##  DistFromHme    Education     EduField     EmpCount        EmpId 
##            0            0            0            0            0 
##   EnvironSat       Gender   HourlyRate JobInvolvemt     JobLevel 
##            0            0            0            0            0 
##      JobRole       JobSat  MaritalStat      MnthInc     MnthRate 
##            0            0            0            0            0 
##  NumCoWorked       Over18     Overtime   PcntSalInc   PerfRating 
##            0            0            0            0            0 
##  RelationSat   StandHours   StckOptLev  TotalWrkYrs  TrainPrevYr 
##            0            0            0            0            0 
##  WorkLifeBal    YrsAtComp  YrsCurrRole  YrsSncPromo  YrsWtCurMgr 
##            0            0            0            0            0

None of the quick graphs above threw any errors indicating that missing values were removed before plotting. There are no NA values for any variable. Furthermore, there are no outliers or unusual observations that could indicate entry errors. It seems reasonable to proceed with the assumption that the data set is correct and relatively clean.

##   Over18 freq
## 1      Y 1470

As shown in a bar graph above, the Over 18 variable indicates that all employees are at least 18 years old. Furthermore, its total count matches the total number of observations. The minimum age (shown below), 18, confirms that there are no underage employees in the data set.

Before proceeding further, any variables with no variation (Employee Count, Over 18, and Standard Hours) can be removed because they have no useful information for predictive modeling. Employee ID can also be removed because observations can be tracked by other means. This results in a data frame of 1,470 rows and 31 columns.

3. Preliminary Analysis

3b. Descriptive stats

The following 2 tables show the descriptive statistics for 7 of the variables:

  • Age
  • Monthly Income (MnthInc)
  • Percent Salary Increase (PcntSalInc)
  • Years at Company (YrsAtComp)
  • Years at Current Role (YrsCurrRole)
  • Years Since Promotion (YrsSncPromo)
  • Years with Current Manager (YrsWtCurMgr)
Age Monthly Income % Salary Increase Years at Company
Min. :18.00 Min. : 1009 Min. :11.00 Min. : 0.000
1st Qu.:30.00 1st Qu.: 2911 1st Qu.:12.00 1st Qu.: 3.000
Median :36.00 Median : 4919 Median :14.00 Median : 5.000
Mean :36.92 Mean : 6503 Mean :15.21 Mean : 7.008
3rd Qu.:43.00 3rd Qu.: 8379 3rd Qu.:18.00 3rd Qu.: 9.000
Max. :60.00 Max. :19999 Max. :25.00 Max. :40.000
Years at Current Role Years Since Promotion Years with Current Manager
Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 2.000
Median : 3.000 Median : 1.000 Median : 3.000
Mean : 4.229 Mean : 2.188 Mean : 4.123
3rd Qu.: 7.000 3rd Qu.: 3.000 3rd Qu.: 7.000
Max. :18.000 Max. :15.000 Max. :17.000

The followng is a histrogram showing the distribution of Age in the company. Overlayed on top of the histogram is the density curve and can see that the ages are a bit right tailed distributed with a lot of the workforce between 29 and 35 - a relatively young workforce.

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   30.00   36.00   36.92   43.00   60.00

The following is a histrogram of Monthly Income in the company with an overlay of the density curve showing the distribution of income. The distribution appears to be right skewed with around 300 employees making around $2000 per month.

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1009    2911    4919    6503    8379   19999

3c. Frequencies on Gender, Education, Occupation

The table command is used to count the employee data by a specified field type. The gender table shows that 60% of the employees are male in this database. The education level shows that the highest concentration employees have Bachelor degree follow closely by the Master’s degree. These two positions account for 68% of the overall employee database.

Headcount By Gender
Gender Count
1 Female 588
2 Male 882
Headcount by Education
Education Level Count
1 Below College 170
2 College 282
3 Bachelor 572
4 Master 398
5 Doctor 48
Headcount By Job Role
Job Role Count
1 Healthcare Representative 131
2 Human Resources 52
3 Laboratory Technician 259
4 Manager 102
5 Manufacturing Director 145
6 Research Director 80
7 Research Scientist 292
8 Sales Executive 326
9 Sales Representative 83

3d. Counts of Management positions

Our analysis of the data did not base management positions solely on the job grade. Two tracks for promotion include the individual contributor and management track. The management track in our analysis includes positions in which employee line management would exist. Manager, Manufacturing Director and the Research Director would fall into that category. Using this definition 22% of the positions are management. All of other positions were not considered as management.

Headcount by Manager Role
Job Role Count
1 Manager 102
2 Manufacturing Director 145
3 Research Director 80

4. Deep Analysis

Correlation

DDSAnalytics’ main question of interest with this data set is to identify factors contributing to turnover, but other trends in the data may also be useful. Therefore, a visualization that shows how the variables correlate with each other is a good starting point for analysis.

The above correlogram requires the fully numeric version of the data frame. Any correlations involving a categorical variable with no intrinsic order, such as the strong negative correlation between Marital Status and Stock Option Level, requires more investigation to determine whether the correlation is meaningful or just a byproduct of the numeric coercion. In this plot, stronger colors (that allow viewing of the white text) are associated with stronger correlations. Hence, the diagonal of each variable against itself is dark blue with the expected Pearson’s r of 1.

Some of the following relationships stand out on a cursory exploration:

  • Age, Job Level, Monthly Income, Total Working Years, Years At Company, Years In Current Role, Years Since Last Promotion, and Years With Current Manager all show moderate to strong positive correlations with each other. This makes intuitive sense, as an older person with a longer career is more likely to hold higher positions and wield more earning potential. (Age and Monthly Income specifically are further explored in the Age and Income section.) By itself, Age also has weaker positive correlations with amount of Education and Number of Companies Worked.
  • Department and Job Role show a strong positive correlation. Though both are categorical variables, the organization within both happens to line up such that numbers for related levels trend together, such as Sales Representatives in the Sales department. This is further explored in the Job-Specific Trends section.
  • Hourly Rate, Daily Rate, Monthly Rate, and Monthly Income do not show any correlation. This finding is somewhat unexpected, as one would expect these monetary variables to show some sort of relationship. This is further explored in the Rates & Income section.
  • Performance Rating and Percent Salary Increase show a strong positive correlation; those who perform the best are likely given the biggest pay raises. Sure enough, only employees who reported an Oustanding performance rating (4 out of 4) were rewarded with raises above 20%, as shown below.

Individual correlations between Attrition and other variables are relatively weak. Of these, Overtime, Monthly Income, Job Level, and any of the variables related to career length (e.g., Age, Total Working Years) look the most promising. This is further explored in the Predicting Attrition section.

Rates & Income

The data set contains four variables related to money: Hourly Rate, Daily Rate, Monthly Rate, and Monthly Income. Their data definitions are not well-defined, but it can be reasonably assumed that the “Rate” variables relate to wages earned over the indicated unit of time and the “Income” variable translates to monthly take-home pay. If this were the case, it should be feasible to build a model to describe the relationship between them.

Scatterplots of the rates and income variables against each other do not show any discernible relationship; the correlation values, being near zero, would suggest that no relationship exists at all, supporting the earlier observation made about the correlogram. This is an unexpected result, as one would reasonably expect that these variables would trend together. Perhaps another variable not captured by the data, or the survey method by which the data was collected from employees, is responsible.

Another oddity reveals itself when we compare the minimum and maximum values for each variable.

Rates & Income Bounds
Minimum Maximum
Hourly Rate 30 100
Daily Rate 102 1499
Monthly Rate 2094 26999
Monthly Income 1009 19999

For all observations, Monthly Rate is greater than Daily Rate, which in turn is greater than Hourly Rate. This is expected. However, Monthly Rate is greater than Monthly Income for only 82.45% of the observations; in other words, 258 of the 1,470 employees reported a greater Monthly Income than Monthly Rate. Assuming the above interpretation of rate as pre-deduction and income as post-deduction earnings is accurate, why might this be? Investigating the reason for these strange trends is recommended.

4c. Age and Income

Analyzing the potential relationship between Age and income is another of DDSAnalytics’ questions of interest. Considering the earlier exploratory histograms and our assumed definitions, we use the Monthly Income variable over working with any of the rates.

## 
## Call:
## lm(formula = MnthInc ~ Age, data = employee_factor)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9990.1 -2592.7  -677.9  1810.5 12540.8 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept) -2970.67     443.70  -6.695      0.0000000000306 ***
## Age           256.57      11.67  21.995 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4084 on 1468 degrees of freedom
## Multiple R-squared:  0.2479, Adjusted R-squared:  0.2473 
## F-statistic: 483.8 on 1 and 1468 DF,  p-value: < 0.00000000000000022

A moderate positive correlation exists between Age and Monthly Income, with Pearson’s r = 0.4978546. Drawing a scatterplot between the two shows a general upward trend in income as Age increases, and the best-fitting regression line has a positive slope.

However, an early exploratory histogram showed that Monthly Income is right-skewed, as is normal for salary distributions. Furthermore, residual diagnostic plots show possible violations against linear regression assumptions, particularly in heteroscedasticity (normality is taken care of by the Central Limit Theorem, as this is a large sample). We can attempt a log transformation to try to mitigate heteroscedasticity.

## `mutate_each()` is deprecated.
## Use `mutate_all()`, `mutate_at()` or `mutate_if()` instead.
## To map `funs` over a selection of variables, use `mutate_at()`

## 
## Call:
## lm(formula = lMnthInc ~ Age, data = employee_log)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66052 -0.42428 -0.00638  0.42475  1.42043 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  7.22499    0.06277  115.11 <0.0000000000000002 ***
## Age          0.03595    0.00165   21.79 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5778 on 1468 degrees of freedom
## Multiple R-squared:  0.2443, Adjusted R-squared:  0.2438 
## F-statistic: 474.7 on 1 and 1468 DF,  p-value: < 0.00000000000000022

The correlation for Age and log(Monthly Income) drops slightly from before, with Pearson’s r = 0.4943101. However, the residual diagnostics look much better.

In any case, there is a definite positive correlation between Age and Monthly Income, supporting the observation made from the correlogram. Linear and log-linear regression models can be created, indicating that mean or median Monthly Income can likely be predicted from Age.

Predicting Attrition

In this data set 16.12%, or 237 of the 1470 employees attrited. Analyzing statistics and plots can help develop the profile of the type of employee who left, especially compared to those who stayed.

Developing the Profile

Attrition: Means for Numeric Variables
Attrition No Yes
Age 37.56 33.61
Distance From Home 8.92 10.63
Education 2.93 2.84
Environment Satisfaction 2.77 2.46
Job Involvement 2.77 2.52
Job Level 2.15 1.64
Job Satisfaction 2.78 2.47
Monthly Income 6832.74 4787.09
Number of Companies Worked 2.65 2.94
Percent Salary Increase 15.23 15.10
Performance Rating 3.15 3.16
Relationship Satisfaction 2.73 2.60
Stock Option Level 0.85 0.53
Total Working Years 11.86 8.24
Training Times Last Year 2.83 2.62
Work Life Balance 2.78 2.66
Years At Company 7.37 5.13
Years In Current Role 4.48 2.90
Years Since Last Promotion 2.23 1.95
Years With Current Manager 4.37 2.85
# Throwaway graph to obtain common Attrition legend.
att_age_a <- ContDensity(employee_factor, "Age", "Attrition")
attrition_legend <- get_legend(att_age_a)

# Attrition and Age density plot.
att_age <- ContDensity(employee_factor, "Age", "Attrition") +
  ggtitle("Age") + theme(legend.position="none")
# Attrition and Distance From Home density plot.
att_distance <- ContDensity(employee_factor, "DistFromHme", "Attrition") +
  ggtitle("Distance From Home") + theme(legend.position="none")
# Attrition and Monthly Income density plot.
att_monthlyincome <- ContDensity(employee_factor, "MnthInc", "Attrition") +
  ggtitle("Monthly Income") + theme(legend.position="none")
# Attrition and Percent Salary Increase density plot.
att_salaryincrease <- ContDensity(employee_factor, "PcntSalInc", "Attrition") +
  ggtitle("Percent Salary Increase") + theme(legend.position="none")
# Attrition and Number of Companies Worked density plot.
att_numcompanies <- ContDensity(employee_factor, "NumCoWorked", "Attrition") +
  ggtitle("Number of Companies Worked") + theme(legend.position="none")
# Attrition and Total Working Years density plot.
att_totalyears <- ContDensity(employee_factor, "TotalWrkYrs", "Attrition") +
  ggtitle("Total Working Years") + theme(legend.position="none")
# Attrition and Training Times Last Year density plot.
att_trainprevyear <- ContDensity(employee_factor, "TrainPrevYr", "Attrition") +
  ggtitle("Training Times Last Year") + theme(legend.position="none")
# Attrition and Years at Company density plot.
att_yearscomp <- ContDensity(employee_factor, "YrsAtComp", "Attrition") +
  ggtitle("Years At Company") + theme(legend.position="none")
# Attrition and Years In Current Role density plot.
att_yearsrole <- ContDensity(employee_factor, "YrsCurrRole", "Attrition") +
  ggtitle("Years In Current Role") + theme(legend.position="none")
# Attrition and Years Since Last Promotion density plot.
att_yearspromo <- ContDensity(employee_factor, "YrsSncPromo", "Attrition") +
  ggtitle("Years Since Last Promotion") + theme(legend.position="none")
# Attrition and Years With Current Manager density plot.
att_yearsmanager <- ContDensity(employee_factor, "YrsWtCurMgr", "Attrition") +
  ggtitle("Years With Current Manager") + theme(legend.position="none")

# Collectively output Attrition density plots to show distributions.
grid.arrange(att_age, att_distance, att_monthlyincome, att_salaryincrease, 
             att_numcompanies, att_totalyears, att_trainprevyear, att_yearscomp,
             att_yearsrole, att_yearspromo, att_yearsmanager, attrition_legend, 
             ncol=4, 
             top=textGrob("Attrition Density Distributions", 
                          gp=gpar(fontsize=20, fontface="bold")))

# Attrition and Business Travel barplots.
att_travel_count <- ggplot(employee_factor, aes(x=BusTravel, y=..count..)) +
  geom_bar(aes(fill=Attrition), position="dodge") +
  coord_flip() +
  ggtitle("Business Travel: Counts") +
  scale_fill_manual(values=attcol) +
  theme(axis.text.x=element_text(size=8),
        axis.text.y=element_text(size=8),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))
att_travel_rate <- employee_factor %>%
  group_by(BusTravel, Attrition) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::mutate(freq=round(100*n/sum(n),2)) %>%
  subset(Attrition=="Yes") %>%
  ggplot(aes(x=reorder(BusTravel, freq), y=freq)) +
  geom_bar(stat="identity", aes(fill=BusTravel)) +
  coord_flip() +
  scale_fill_manual(values=softcolors) +
  geom_text(aes(x=BusTravel, y=freq, group=freq, label=paste0(freq,"%")), size=2, hjust=1) +
  ggtitle("Business Travel: Attrition Percentage") +
  theme(axis.text=element_text(size=7),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))

# Attrition and Department barplots.
att_dept_count <- ggplot(employee_factor, aes(x=Department, y=..count..)) +
  geom_bar(aes(fill=Attrition), position="dodge") +
  coord_flip() +
  ggtitle("Department: Counts") +
  scale_fill_manual(values=attcol) +
  theme(axis.text.x=element_text(size=8),
        axis.text.y=element_text(size=8),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))
att_dept_rate <- employee_factor %>%
  group_by(Department, Attrition) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::mutate(freq=round(100*n/sum(n),2)) %>%
  subset(Attrition=="Yes") %>%
  ggplot(aes(x=reorder(Department, freq), y=freq)) +
  geom_bar(stat="identity", aes(fill=Department)) +
  coord_flip() +
  scale_fill_manual(values=softcolors[4:6]) +
  geom_text(aes(x=Department, y=freq, group=freq, label=paste0(freq,"%")), size=2, hjust=1) +
  ggtitle("Department: Attrition Percentage") +
  theme(axis.text=element_text(size=7),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))

# Attrition and Education Field barplots.
att_edufield_count <- ggplot(employee_factor, aes(x=EduField, y=..count..)) +
  geom_bar(aes(fill=Attrition), position="dodge") +
  coord_flip() +
  ggtitle("Education Field: Counts") +
  scale_fill_manual(values=attcol) +
  theme(axis.text.x=element_text(size=8),
        axis.text.y=element_text(size=8),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))
att_edufield_rate <- employee_factor %>%
  group_by(EduField, Attrition) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::mutate(freq=round(100*n/sum(n),2)) %>%
  subset(Attrition=="Yes") %>%
  ggplot(aes(x=reorder(EduField, freq), y=freq)) +
  geom_bar(stat="identity", aes(fill=EduField)) +
  coord_flip() +
  scale_fill_manual(values=softcolors[1:6]) +
  geom_text(aes(x=EduField, y=freq, group=freq, label=paste0(freq,"%")), size=2, hjust=1) +
  ggtitle("Education Field: Attrition Percentage") +
  theme(axis.text=element_text(size=7),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))

# Attrition and Gender barplots.
att_gender_count <- ggplot(employee_factor, aes(x=Gender, y=..count..)) +
  geom_bar(aes(fill=Attrition), position="dodge") +
  coord_flip() +
  ggtitle("Gender: Counts") +
  scale_fill_manual(values=attcol) +
  theme(axis.text.x=element_text(size=8),
        axis.text.y=element_text(size=8),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))
att_gender_rate <- employee_factor %>%
  group_by(Gender, Attrition) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::mutate(freq=round(100*n/sum(n),2)) %>%
  subset(Attrition=="Yes") %>%
  ggplot(aes(x=reorder(Gender, freq), y=freq)) +
  geom_bar(stat="identity", aes(fill=Gender)) +
  coord_flip() +
  geom_text(aes(x=Gender, y=freq, group=freq, label=paste0(freq,"%")), size=2, hjust=1) +
  ggtitle("Gender: Attrition Percentage") +
  theme(axis.text=element_text(size=7),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))

# Attrition and Marital Status barplots.
att_marital_count <- ggplot(employee_factor, aes(x=MaritalStat, y=..count..)) +
  geom_bar(aes(fill=Attrition), position="dodge") +
  coord_flip() +
  ggtitle("Marital Status: Counts") +
  scale_fill_manual(values=attcol) +
  theme(axis.text.x=element_text(size=8),
        axis.text.y=element_text(size=8),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))
att_marital_rate <- employee_factor %>%
  group_by(MaritalStat, Attrition) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::mutate(freq=round(100*n/sum(n),2)) %>%
  subset(Attrition=="Yes") %>%
  ggplot(aes(x=reorder(MaritalStat, freq), y=freq)) +
  geom_bar(stat="identity", aes(fill=MaritalStat)) +
  coord_flip() +
  scale_fill_manual(values=softcolors[7:9]) +
  geom_text(aes(x=MaritalStat, y=freq, group=freq, label=paste0(freq,"%")), size=2, hjust=1) +
  ggtitle("Marital Status: Attrition Percentage") +
  theme(axis.text=element_text(size=7),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))

# Attrition and Overtime barplots.
att_overtime_count <- ggplot(employee_factor, aes(x=Overtime, y=..count..)) +
  geom_bar(aes(fill=Attrition), position="dodge") +
  coord_flip() +
  ggtitle("Overtime: Counts") +
  scale_fill_manual(values=attcol) +
  theme(axis.text.x=element_text(size=8),
        axis.text.y=element_text(size=8),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))
att_overtime_rate <- employee_factor %>%
  group_by(Overtime, Attrition) %>%
  dplyr::summarise(n=n()) %>%
  dplyr::mutate(freq=round(100*n/sum(n),2)) %>%
  subset(Attrition=="Yes") %>%
  ggplot(aes(x=reorder(Overtime, freq), y=freq)) +
  geom_bar(stat="identity", aes(fill=Overtime)) +
  coord_flip() +
  scale_fill_manual(values=attcol) +
  geom_text(aes(x=Overtime, y=freq, group=freq, label=paste0(freq,"%")), size=2, hjust=1) +
  ggtitle("Overtime: Attrition Percentage") +
  theme(axis.text=element_text(size=7),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        legend.position="none",
        plot.title=element_text(size=10))

# Output combinations of count barplots and percentage barplots. Separated to expand height.
grid.arrange(att_travel_count, att_travel_rate,
             att_dept_count, att_dept_rate,
             att_edufield_count, att_edufield_rate, ncol=2)

From the averages, density distributions, and barplots, a general profile emerges on what the typical employee who leaves generally looks like compared to an employee who stays:

  • Younger, single, less educated
  • Holds lower job level with lower job involvement, lower stock option levels, and lower monthly income
  • Lives further from work, travels for business, and works overtime
  • Less satisfied with environment, job, and relationship; reports lower work-life balance
  • Earlier in career, fewer years with company
  • More likely to be in the human resources or sales departments; more likely to hold sales representative, laboratory technician, or human resources as job role

Of these factors, which are the biggest determinants for attrition, and is it possible to predict whether an employee will leave using them?

Building a Predictive Model

In each of the following models, the original data set is divided into training and testing subsets. We build a model based on the training data, and then see how accurately the model can predict known results in the testing data.

Logistic Regression

For a classification problem, logistic regression is a natural first model to try.

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  244  17
##        Yes  64  42
##                                           
##                Accuracy : 0.7793          
##                  95% CI : (0.7333, 0.8207)
##     No Information Rate : 0.8392          
##     P-Value [Acc > NIR] : 0.9989          
##                                           
##                   Kappa : 0.3813          
##  Mcnemar's Test P-Value : 0.0000003203    
##                                           
##             Sensitivity : 0.7119          
##             Specificity : 0.7922          
##          Pos Pred Value : 0.3962          
##          Neg Pred Value : 0.9349          
##              Prevalence : 0.1608          
##          Detection Rate : 0.1144          
##    Detection Prevalence : 0.2888          
##       Balanced Accuracy : 0.7520          
##                                           
##        'Positive' Class : Yes             
## 
##       Overall                    Predictors
## 1  9.23468519                   OvertimeYes
## 13 5.30016699                   YrsSncPromo
## 12 4.55903869                        JobSat
## 18 4.45542727                   NumCoWorked
## 14 3.98643034                    EnvironSat
## 15 3.70800969    BusTravelTravel_Frequently
## 9  3.42105195   JobRoleSales Representative
## 20 3.31646203                   DistFromHme
## 19 3.09523634                  JobInvolvemt
## 11 3.03681128                   YrsCurrRole
## 3  3.02737162  JobRoleLaboratory Technician
## 26 2.91397741                   WorkLifeBal
## 17 2.77481301                           Age
## 2  2.76483875        JobRoleHuman Resources
## 23 2.70011458                   RelationSat
## 22 2.49712400                   TrainPrevYr
## 27 2.28732195                    GenderMale
## 25 2.18097566             MaritalStatSingle
## 21 1.99944230                   YrsWtCurMgr
## 10 1.96311000                    StckOptLev
## 16 1.96253301        BusTravelTravel_Rarely
## 8  1.87260383        JobRoleSales Executive
## 33 1.59129282                   TotalWrkYrs
## 34 1.46137870                      MnthRate
## 32 1.37550019      EduFieldTechnical Degree
## 7  1.01670064     JobRoleResearch Scientist
## 6  0.69422240      JobRoleResearch Director
## 29 0.52064923             EduFieldMarketing
## 4  0.30134839                JobRoleManager
## 31 0.18152971                 EduFieldOther
## 24 0.17887285            MaritalStatMarried
## 5  0.11115739 JobRoleManufacturing Director
## 28 0.05074586         EduFieldLife Sciences
## 30 0.03539496               EduFieldMedical

This first model is 77.93% accurate, with 71.19% sensitivity for detecting those who attrite. Of note, it reports that Overtime, Years Since Promotion, Job Satisfaction, and Number of Companies Worked are among the most important predictors.

However, this intial model is on the original, untransformed data. Histograms from the exploratory data analysis show that some variables are heavily right skewed, such as Monthly Income. To ensure that our linear regression model meets the normality assumption, we attempt to log transform the numeric variables where possible. Many variables regarding number of years contain zeros, which do not produce valid numbers when log transformed. These observations cannot be dropped because those employees are part of the target group we are trying to identify. Thus, we proceed with a select few variables transformed: Distance From Home, Monthly Income, and Percent Salary Increase. Additionally, we drop some of the “Years” variables that are highly correlated.

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  255  19
##        Yes  53  40
##                                           
##                Accuracy : 0.8038          
##                  95% CI : (0.7594, 0.8432)
##     No Information Rate : 0.8392          
##     P-Value [Acc > NIR] : 0.9699524       
##                                           
##                   Kappa : 0.4103          
##  Mcnemar's Test P-Value : 0.0001006       
##                                           
##             Sensitivity : 0.6780          
##             Specificity : 0.8279          
##          Pos Pred Value : 0.4301          
##          Neg Pred Value : 0.9307          
##              Prevalence : 0.1608          
##          Detection Rate : 0.1090          
##    Detection Prevalence : 0.2534          
##       Balanced Accuracy : 0.7529          
##                                           
##        'Positive' Class : Yes             
## 
##       Overall                    Predictors
## 1  9.37495551                   OvertimeYes
## 10 5.79860451                   StckOptLev1
## 35 4.97881976                   NumCoWorked
## 24 4.27694179                  WorkLifeBal3
## 15 4.21291937                       JobSat4
## 28 4.18683190                  lDistFromHme
## 21 4.14747331                   EnvironSat3
## 26 4.07091220    BusTravelTravel_Frequently
## 22 4.06621072                   EnvironSat4
## 31 3.99725267                 JobInvolvemt4
## 20 3.52296304                   EnvironSat2
## 32 3.49035549                  RelationSat2
## 33 3.46479129                  RelationSat3
## 34 3.28988064                  RelationSat4
## 30 3.26454129                 JobInvolvemt3
## 11 3.06788220                   StckOptLev2
## 29 2.65770246                 JobInvolvemt2
## 38 2.56554359                      lMnthInc
## 23 2.51532857                  WorkLifeBal2
## 45 2.50463010                   TotalWrkYrs
## 16 2.42879328                     JobLevel2
## 12 2.31500633                   StckOptLev3
## 44 2.23322044                   TrainPrevYr
## 25 2.11759944                  WorkLifeBal4
## 37 2.11076181                    GenderMale
## 8  2.01151427        JobRoleSales Executive
## 27 1.99457712        BusTravelTravel_Rarely
## 19 1.90676391                     JobLevel5
## 36 1.79395778                           Age
## 47 1.65102516                      MnthRate
## 6  1.62040116      JobRoleResearch Director
## 46 1.60964019                     DailyRate
## 7  1.27585887     JobRoleResearch Scientist
## 13 1.23659436                       JobSat2
## 18 1.01250743                     JobLevel4
## 43 0.99280930      EduFieldTechnical Degree
## 14 0.93906987                       JobSat3
## 9  0.81233095   JobRoleSales Representative
## 2  0.65748832        JobRoleHuman Resources
## 4  0.51576689                JobRoleManager
## 17 0.49693149                     JobLevel3
## 3  0.42934405  JobRoleLaboratory Technician
## 39 0.40713383         EduFieldLife Sciences
## 5  0.33630756 JobRoleManufacturing Director
## 41 0.28341888               EduFieldMedical
## 42 0.15266846                 EduFieldOther
## 40 0.08857312             EduFieldMarketing

The overall accuracy on this second model goes up slightly to 80.38%, but sensitivity for Attrition actually drops to 67.80%. In this model, Overtime, Stock Option Level, Number of Companies Worked, Job Satisfaction, Work Life Balance, Business Travel, and Environment Satisfaction are among the most important predictors.

k-Nearest Neighbors

Another model is k-nearest neighbors, which makes classifications based on Euclidean distances between values. The disparity between ranges means we need to begin by normalizing our numeric values so that they are not the main determinants.

## `mutate_each()` is deprecated.
## Use `mutate_all()`, `mutate_at()` or `mutate_if()` instead.
## To map `funs` over a selection of variables, use `mutate_at()`
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  300  55
##        Yes   8   4
##                                           
##                Accuracy : 0.8283          
##                  95% CI : (0.7858, 0.8655)
##     No Information Rate : 0.8392          
##     P-Value [Acc > NIR] : 0.7418          
##                                           
##                   Kappa : 0.0617          
##  Mcnemar's Test P-Value : 0.000000006814  
##                                           
##             Sensitivity : 0.0678          
##             Specificity : 0.9740          
##          Pos Pred Value : 0.3333          
##          Neg Pred Value : 0.8451          
##              Prevalence : 0.1608          
##          Detection Rate : 0.0109          
##    Detection Prevalence : 0.0327          
##       Balanced Accuracy : 0.5209          
##                                           
##        'Positive' Class : Yes             
## 
## ROC curve variable importance
## 
##   only 20 most important variables shown (out of 30)
## 
##              Importance
## MnthInc          100.00
## TotalWrkYrs       97.53
## YrsAtComp         92.77
## Overtime          89.54
## YrsCurrRole       88.22
## JobLevel          88.21
## Age               84.47
## YrsWtCurMgr       80.43
## StckOptLev        78.26
## MaritalStat       65.86
## JobSat            46.97
## JobInvolvemt      38.90
## EnvironSat        30.80
## DistFromHme       29.57
## JobRole           27.34
## Education         22.27
## DailyRate         22.08
## EduField          21.64
## TrainPrevYr       20.53
## YrsSncPromo       16.54

This model has a 82.83% accuracy rate and is good at picking out true negatives. However, its sensitivity for identifying Attrition is very low, at 6.78%. In other words, it is not a good model for picking out those who would attrite. For what it’s worth, the model identifies Monthly Income, Total Work Years, Years At Company, Overtime, and Years In Current Role as the most important predictors.

Decision Tree/Random Forest

Another classification method is the decision tree.

## 
## Classification tree:
## rpart(formula = Attrition ~ ., data = train_subset, method = "class")
## 
## Variables actually used in tree construction:
## [1] Age         DailyRate   EnvironSat  JobRole     MnthInc     MnthRate   
## [7] Overtime    StckOptLev  TotalWrkYrs
## 
## Root node error: 178/1103 = 0.16138
## 
## n= 1103 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.067416      0   1.00000 1.00000 0.068639
## 2 0.028090      2   0.86517 1.02247 0.069256
## 3 0.022472      4   0.80899 0.98876 0.068326
## 4 0.013109      8   0.71910 0.94944 0.067206
## 5 0.010000     13   0.64607 1.00562 0.068795
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  294  40
##        Yes  14  19
##                                           
##                Accuracy : 0.8529          
##                  95% CI : (0.8124, 0.8875)
##     No Information Rate : 0.8392          
##     P-Value [Acc > NIR] : 0.2643852       
##                                           
##                   Kappa : 0.3365          
##  Mcnemar's Test P-Value : 0.0006688       
##                                           
##             Sensitivity : 0.32203         
##             Specificity : 0.95455         
##          Pos Pred Value : 0.57576         
##          Neg Pred Value : 0.88024         
##              Prevalence : 0.16076         
##          Detection Rate : 0.05177         
##    Detection Prevalence : 0.08992         
##       Balanced Accuracy : 0.63829         
##                                           
##        'Positive' Class : Yes             
## 

The decision tree fares slightly better than the k-NN model, with 85.29% overall accuracy and 32.20% sensitivity to Attrition. Still, this is not particularly good for identifying attriting employees.

A random forest model is an ensemble of decision trees. Theoretically, the crowd’s perforamnce should exceed that of the individual.

##                 No   Yes MeanDecreaseAccuracy MeanDecreaseGini
## Overtime      8.53 12.04                12.91            16.83
## JobRole       8.44  3.72                 9.17            14.81
## TotalWrkYrs   7.29  3.36                 8.98            15.00
## MnthInc       7.27  4.48                 8.73            23.64
## Age           4.79  4.58                 6.47            20.04
## JobLevel      4.42  5.19                 6.03             6.33
## StckOptLev    4.41  4.99                 6.00             8.97
## MaritalStat   4.34  3.09                 5.62             5.58
## YrsWtCurMgr   4.77  1.41                 4.89             8.96
## YrsAtComp     3.51  2.19                 4.48            11.13
## JobSat        2.04  4.90                 4.42             8.34
## YrsCurrRole   3.87  1.43                 4.39             7.27
## EduField      3.27  2.78                 4.12            11.37
## BusTravel     2.62  2.39                 3.19             5.00
## JobInvolvemt  2.38  2.32                 3.15             6.42
## WorkLifeBal   1.41  3.56                 2.94             8.23
## NumCoWorked   3.03 -0.36                 2.72             9.23
## DistFromHme   2.18  0.06                 2.02            13.57
## Department    0.53  2.06                 1.38             2.84
## Education     0.69  0.92                 1.01             5.33
## DailyRate     0.46  0.37                 0.57            16.03
## PerfRating    0.83 -1.06                 0.42             1.24
## RelationSat   0.51 -0.20                 0.33             6.89
## YrsSncPromo   1.22 -1.97                 0.07             7.13
## EnvironSat   -0.91  1.81                -0.03             8.50
## HourlyRate    0.03 -0.35                -0.14            14.55
## MnthRate      0.00 -1.61                -0.82            14.83
## TrainPrevYr  -0.86 -0.23                -0.82             6.53
## PcntSalInc   -1.06  0.25                -0.88            10.09
## Gender       -1.03 -0.97                -1.35             2.41
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  306   2
##        Yes  46  13
##                                          
##                Accuracy : 0.8692         
##                  95% CI : (0.8304, 0.902)
##     No Information Rate : 0.9591         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.3061         
##  Mcnemar's Test P-Value : 0.0000000005417
##                                          
##             Sensitivity : 0.86667        
##             Specificity : 0.86932        
##          Pos Pred Value : 0.22034        
##          Neg Pred Value : 0.99351        
##              Prevalence : 0.04087        
##          Detection Rate : 0.03542        
##    Detection Prevalence : 0.16076        
##       Balanced Accuracy : 0.86799        
##                                          
##        'Positive' Class : Yes            
## 

Indeed, the ensemble method works better. With 86.92% accuracy and 86.67% sensitivity at detecting actual attriting employees, the random forest is our best model among those attempted. While there are more false positives, false negatives (people predicted not to attrite who actually do) are more costly, so minimizing that metric is more important.

This model lists Overtime, Job Role, Total Work Years, and Monthly Income as its overall most important predictors. For predicting those who attrite specifically, Overtime, Job Level, Stock Option Level (likely a symptom of other predictors like Job Level), and Job Satisfaction are most significant, with Age and Monthly Income following behind.

4d. Life Satisfaction

One final question of interest posed is whether life satisfaction can be related to this data set. None of the original 35 variables directly address life satisfaction (i.e., the employees were not explicitly asked to rate life satisfaction). Furthermore, criteria for life satisfaction likely varies between employees and some may not even be captured by any of the available variables (e.g., quality of relationships with family and friends, status of physical and mental health, etc). Therefore, the following analysis using the original data set is purely speculative.

We define a Life Satisfaction variable by averaging Job Satisfaction, Relationship Satisfaction, and Work Life Balance, preexisting variables that capture some aspects of employees’ lives both in and out of work. Conveniently, these are all variables in which employees were asked to rate their feelings on a scale of 1 to 4; an actual Life Satisfaction question would likely be answered in a similar fashion. Investigating whether this created variable varies over Age, Monthly Income, and Gender seems like a natural line of inquiry.

In the original data definitions, the Satisfaction variables were defined on a 1-4 scale of Low-Medium-High-Very High, while Work-Life Balance was defined as Bad-Good-Better-Best. The scales seem similar enough that our defined Life Satisfaction variable can take on the Low-Medium-High-Very High interpretation.

The linear regression slopes are relatively flat, which could be interpreted in a few different ways. Perhaps Life Satisfaction remains relatively stable across the ranges of Age and Monthly Income. Or maybe no correlation actually exists. Our defined Life Satisfaction variable might not even adequately capture what we want.

Although there is very little change in Life Satisfaction as either Age or Monthly Income increases, the separation by Gender shows an interesting result. Though their confidence intervals contain each other, the mean Life Satisfaction indicated by the linear regression line for Males is higher than Females for all Ages in the data set. For Monthly Income, mean Life Satisfaction is higher for Males below $10,000, but slightly higher for Females at the high end near $20,000.

Going forward, explicitly asking a survey question about Life Satisfaction would be helpful if DDSAnalytics wants to know that information.

Deep Dive

Do the charts/plots again against just the people that have been attrited

For those that are attrited, focus on the people at company less than 2 years

##      Age Attrition         BusTravel DailyRate             Department
## 3     37       Yes     Travel_Rarely      1373 Research & Development
## 34    39       Yes     Travel_Rarely       895                  Sales
## 35    24       Yes     Travel_Rarely       813 Research & Development
## 43    26       Yes     Travel_Rarely      1357 Research & Development
## 51    48       Yes     Travel_Rarely       626 Research & Development
## 52    28       Yes     Travel_Rarely      1434 Research & Development
## 70    36       Yes     Travel_Rarely       318 Research & Development
## 103   20       Yes Travel_Frequently       871 Research & Development
## 128   19       Yes     Travel_Rarely       528                  Sales
## 133   31       Yes     Travel_Rarely       542                  Sales
## 172   19       Yes Travel_Frequently       602                  Sales
## 178   19       Yes     Travel_Rarely       303 Research & Development
## 193   35       Yes     Travel_Rarely       556 Research & Development
## 205   38       Yes     Travel_Rarely      1180 Research & Development
## 251   37       Yes Travel_Frequently       504 Research & Development
## 265   28       Yes     Travel_Rarely       529 Research & Development
## 287   44       Yes Travel_Frequently       920 Research & Development
## 297   18       Yes     Travel_Rarely       230 Research & Development
## 328   39       Yes     Travel_Rarely      1162                  Sales
## 364   33       Yes     Travel_Rarely       350                  Sales
## 371   21       Yes     Travel_Rarely       156                  Sales
## 386   30       Yes Travel_Frequently       464 Research & Development
## 406   25       Yes     Travel_Rarely       688 Research & Development
## 416   34       Yes Travel_Frequently       296                  Sales
## 422   29       Yes     Travel_Rarely       408 Research & Development
## 423   19       Yes     Travel_Rarely       489        Human Resources
## 440   31       Yes Travel_Frequently       534 Research & Development
## 444   22       Yes Travel_Frequently      1368 Research & Development
## 458   18       Yes Travel_Frequently      1306                  Sales
## 464   26       Yes     Travel_Rarely       471 Research & Development
## 481   30       Yes Travel_Frequently       448                  Sales
## 505   45       Yes Travel_Frequently       306                  Sales
## 514   20       Yes     Travel_Rarely      1362 Research & Development
## 526   24       Yes     Travel_Rarely       693                  Sales
## 548   42       Yes Travel_Frequently       933 Research & Development
## 586   23       Yes     Travel_Rarely      1243 Research & Development
## 590   29       Yes     Travel_Rarely       805 Research & Development
## 599   28       Yes     Travel_Rarely       890 Research & Development
## 657   32       Yes     Travel_Rarely       374 Research & Development
## 661   58       Yes Travel_Frequently       781 Research & Development
## 663   20       Yes     Travel_Rarely       500                  Sales
## 664   21       Yes     Travel_Rarely      1427 Research & Development
## 670   39       Yes     Travel_Rarely      1122 Research & Development
## 684   25       Yes     Travel_Rarely       867                  Sales
## 689   19       Yes     Travel_Rarely       419                  Sales
## 690   20       Yes     Travel_Rarely       129 Research & Development
## 701   58       Yes     Travel_Rarely       289 Research & Development
## 707   40       Yes        Non-Travel      1479                  Sales
## 712   29       Yes     Travel_Rarely       906 Research & Development
## 732   20       Yes     Travel_Rarely      1097 Research & Development
## 745   37       Yes     Travel_Rarely      1141 Research & Development
## 749   26       Yes        Non-Travel       265                  Sales
## 762   36       Yes     Travel_Rarely       660 Research & Development
## 777   20       Yes Travel_Frequently       769                  Sales
## 778   21       Yes     Travel_Rarely      1334 Research & Development
## 798   26       Yes     Travel_Rarely      1330 Research & Development
## 799   33       Yes     Travel_Rarely      1017 Research & Development
## 801   28       Yes Travel_Frequently      1009 Research & Development
## 802   50       Yes Travel_Frequently       959                  Sales
## 829   18       Yes        Non-Travel       247 Research & Development
## 832   31       Yes Travel_Frequently       874 Research & Development
## 843   28       Yes     Travel_Rarely      1485 Research & Development
## 861   22       Yes Travel_Frequently      1256 Research & Development
## 865   41       Yes        Non-Travel       906 Research & Development
## 872   24       Yes     Travel_Rarely       984 Research & Development
## 893   19       Yes        Non-Travel       504 Research & Development
## 912   25       Yes Travel_Frequently       599                  Sales
## 916   21       Yes Travel_Frequently       251 Research & Development
## 981   31       Yes Travel_Frequently       703                  Sales
## 1013  31       Yes Travel_Frequently       667                  Sales
## 1017  31       Yes     Travel_Rarely       202 Research & Development
## 1033  39       Yes        Non-Travel       592 Research & Development
## 1037  31       Yes Travel_Frequently       523 Research & Development
## 1040  34       Yes     Travel_Rarely      1107        Human Resources
## 1061  24       Yes Travel_Frequently       381 Research & Development
## 1069  28       Yes Travel_Frequently       289 Research & Development
## 1111  35       Yes     Travel_Rarely       104 Research & Development
## 1137  28       Yes     Travel_Rarely       329 Research & Development
## 1154  18       Yes Travel_Frequently       544                  Sales
## 1168  35       Yes     Travel_Rarely       763                  Sales
## 1206  32       Yes     Travel_Rarely      1259 Research & Development
## 1223  24       Yes     Travel_Rarely       240        Human Resources
## 1237  36       Yes     Travel_Rarely      1456                  Sales
## 1238  32       Yes     Travel_Rarely       964                  Sales
## 1250  29       Yes     Travel_Rarely       428                  Sales
## 1256  33       Yes     Travel_Rarely       211                  Sales
## 1258  31       Yes     Travel_Rarely      1079                  Sales
## 1263  43       Yes Travel_Frequently       807 Research & Development
## 1272  21       Yes     Travel_Rarely       337                  Sales
## 1274  22       Yes     Travel_Rarely      1294 Research & Development
## 1313  31       Yes     Travel_Rarely       359        Human Resources
## 1314  29       Yes     Travel_Rarely       350        Human Resources
## 1327  32       Yes     Travel_Rarely       414                  Sales
## 1333  29       Yes Travel_Frequently       459 Research & Development
## 1339  30       Yes     Travel_Rarely       945                  Sales
## 1340  22       Yes     Travel_Rarely       391 Research & Development
## 1366  29       Yes Travel_Frequently       746                  Sales
## 1370  28       Yes     Travel_Rarely      1475                  Sales
## 1380  27       Yes Travel_Frequently      1337        Human Resources
## 1397  53       Yes     Travel_Rarely      1168                  Sales
## 1439  23       Yes Travel_Frequently       638                  Sales
## 1443  29       Yes     Travel_Rarely      1092 Research & Development
##      DistFromHme Education         EduField EnvironSat Gender HourlyRate
## 3              2         2            Other          4   Male         92
## 34             5         3 Technical Degree          4   Male         56
## 35             1         3          Medical          2   Male         61
## 43            25         3    Life Sciences          1   Male         48
## 51             1         2    Life Sciences          1   Male         98
## 52             5         4 Technical Degree          3   Male         50
## 70             9         3          Medical          4   Male         79
## 103            6         3    Life Sciences          4 Female         66
## 128           22         1        Marketing          4   Male         50
## 133           20         3    Life Sciences          2 Female         71
## 172            1         1 Technical Degree          3 Female        100
## 178            2         3    Life Sciences          2   Male         47
## 193           23         2    Life Sciences          2   Male         50
## 205           29         1          Medical          2   Male         70
## 251           10         3          Medical          1   Male         61
## 265            2         4    Life Sciences          1   Male         79
## 287           24         3    Life Sciences          4   Male         43
## 297            3         3    Life Sciences          3   Male         54
## 328            3         2          Medical          4 Female         41
## 364            5         3        Marketing          4 Female         34
## 371           12         3    Life Sciences          3 Female         90
## 386            4         3 Technical Degree          3   Male         40
## 406            3         3          Medical          1   Male         91
## 416            6         2        Marketing          4 Female         33
## 422           25         5 Technical Degree          3 Female         71
## 423            2         2 Technical Degree          1   Male         52
## 440           20         3    Life Sciences          1   Male         66
## 444            4         1 Technical Degree          3   Male         99
## 458            5         3        Marketing          2   Male         69
## 464           24         3 Technical Degree          3   Male         66
## 481           12         4    Life Sciences          2   Male         74
## 505           26         4    Life Sciences          1 Female        100
## 514           10         1          Medical          4   Male         32
## 526            3         2    Life Sciences          1 Female         65
## 548           19         3          Medical          3   Male         57
## 586            6         3    Life Sciences          3   Male         63
## 590            1         2    Life Sciences          2 Female         36
## 599            2         4          Medical          3   Male         46
## 657           25         4    Life Sciences          1   Male         87
## 661            2         1    Life Sciences          4   Male         57
## 663            2         3          Medical          3 Female         49
## 664           18         1            Other          4 Female         65
## 670            6         3          Medical          4   Male         70
## 684           19         2        Marketing          3   Male         36
## 689           21         3            Other          4   Male         37
## 690            4         3 Technical Degree          1   Male         84
## 701            2         3 Technical Degree          4   Male         51
## 707           24         3    Life Sciences          2 Female        100
## 712           10         3    Life Sciences          4 Female         92
## 732           11         3          Medical          4 Female         98
## 745           11         2          Medical          1 Female         61
## 749           29         2          Medical          2   Male         79
## 762           15         3            Other          1   Male         81
## 777            9         3        Marketing          4 Female         54
## 778           10         3    Life Sciences          3 Female         36
## 798           21         3          Medical          1   Male         37
## 799           25         3          Medical          1   Male         55
## 801            1         3          Medical          1   Male         45
## 802            1         4            Other          4   Male         81
## 829            8         1          Medical          3   Male         80
## 832           15         3          Medical          3   Male         72
## 843           12         1    Life Sciences          3 Female         79
## 861            3         4    Life Sciences          3   Male         48
## 865            5         2    Life Sciences          1   Male         95
## 872           17         2    Life Sciences          4 Female         97
## 893           10         3          Medical          1 Female         96
## 912           24         1    Life Sciences          3   Male         73
## 916           10         2    Life Sciences          1 Female         45
## 981            2         3    Life Sciences          3 Female         90
## 1013           1         4    Life Sciences          2 Female         50
## 1017           8         3    Life Sciences          1 Female         34
## 1033           2         3    Life Sciences          1 Female         54
## 1037           2         3    Life Sciences          2   Male         94
## 1040           9         4 Technical Degree          1 Female         52
## 1061           9         3          Medical          2   Male         89
## 1069           2         2          Medical          3   Male         38
## 1111           2         3    Life Sciences          1 Female         69
## 1137          24         3          Medical          3   Male         51
## 1154           3         2          Medical          2 Female         70
## 1168          15         2          Medical          1   Male         59
## 1206           2         4    Life Sciences          4   Male         95
## 1223          22         1  Human Resources          4   Male         58
## 1237          13         5        Marketing          2   Male         96
## 1238           1         2    Life Sciences          1   Male         34
## 1250           9         3        Marketing          2 Female         52
## 1256          16         3    Life Sciences          1 Female         74
## 1258          16         4        Marketing          1   Male         70
## 1263          17         3 Technical Degree          3   Male         38
## 1272           7         1        Marketing          2   Male         31
## 1274           8         1          Medical          3 Female         79
## 1313          18         5  Human Resources          4   Male         89
## 1314          13         3  Human Resources          1   Male         56
## 1327           2         4        Marketing          3   Male         82
## 1333          24         2    Life Sciences          4   Male         73
## 1339           9         3          Medical          2   Male         89
## 1340           7         1    Life Sciences          4   Male         75
## 1366          24         3 Technical Degree          3   Male         45
## 1370          13         2        Marketing          4 Female         84
## 1380          22         3  Human Resources          1 Female         58
## 1397          24         4    Life Sciences          1   Male         66
## 1439           9         3        Marketing          4   Male         33
## 1443           1         4          Medical          1   Male         36
##      JobInvolvemt JobLevel                   JobRole JobSat MaritalStat
## 3               2        1     Laboratory Technician      3      Single
## 34              3        2      Sales Representative      4     Married
## 35              3        1        Research Scientist      4     Married
## 43              1        1     Laboratory Technician      3      Single
## 51              2        3     Laboratory Technician      3      Single
## 52              3        1     Laboratory Technician      3      Single
## 70              2        1        Research Scientist      3     Married
## 103             2        1     Laboratory Technician      4      Single
## 128             3        1      Sales Representative      3      Single
## 133             1        2           Sales Executive      3     Married
## 172             1        1      Sales Representative      1      Single
## 178             2        1     Laboratory Technician      4      Single
## 193             2        2    Manufacturing Director      3     Married
## 205             3        2 Healthcare Representative      1     Married
## 251             3        3    Manufacturing Director      3    Divorced
## 265             3        1     Laboratory Technician      3      Single
## 287             3        1     Laboratory Technician      3    Divorced
## 297             3        1     Laboratory Technician      3      Single
## 328             3        2           Sales Executive      3     Married
## 364             3        1      Sales Representative      3      Single
## 371             4        1      Sales Representative      2      Single
## 386             3        1        Research Scientist      4      Single
## 406             3        1     Laboratory Technician      1     Married
## 416             1        1      Sales Representative      3    Divorced
## 422             2        1        Research Scientist      2     Married
## 423             2        1           Human Resources      4      Single
## 440             3        3 Healthcare Representative      3     Married
## 444             2        1     Laboratory Technician      3      Single
## 458             3        1      Sales Representative      2      Single
## 464             1        1     Laboratory Technician      4      Single
## 481             2        1      Sales Representative      1     Married
## 505             3        2           Sales Executive      1     Married
## 514             3        1        Research Scientist      3      Single
## 526             3        2           Sales Executive      3      Single
## 548             4        1        Research Scientist      3    Divorced
## 586             4        1     Laboratory Technician      1     Married
## 590             2        1     Laboratory Technician      1     Married
## 599             3        1        Research Scientist      3      Single
## 657             3        1     Laboratory Technician      4      Single
## 661             2        1     Laboratory Technician      4    Divorced
## 663             2        1      Sales Representative      3      Single
## 664             3        1        Research Scientist      4      Single
## 670             3        1     Laboratory Technician      1     Married
## 684             2        1      Sales Representative      2     Married
## 689             2        1      Sales Representative      2      Single
## 690             3        1     Laboratory Technician      1      Single
## 701             3        1        Research Scientist      3      Single
## 707             4        4           Sales Executive      2      Single
## 712             2        1        Research Scientist      1      Single
## 732             2        1        Research Scientist      1      Single
## 745             1        2 Healthcare Representative      2     Married
## 749             1        2           Sales Executive      1      Single
## 762             3        2     Laboratory Technician      3    Divorced
## 777             3        1      Sales Representative      4      Single
## 778             2        1     Laboratory Technician      1      Single
## 798             3        1     Laboratory Technician      3    Divorced
## 799             2        1        Research Scientist      2      Single
## 801             2        1     Laboratory Technician      2    Divorced
## 802             3        2           Sales Executive      3      Single
## 829             3        1     Laboratory Technician      3      Single
## 832             3        1     Laboratory Technician      3     Married
## 843             3        1     Laboratory Technician      4     Married
## 861             2        1        Research Scientist      4     Married
## 865             2        1        Research Scientist      1    Divorced
## 872             3        1     Laboratory Technician      2     Married
## 893             2        1        Research Scientist      2      Single
## 912             1        1      Sales Representative      4      Single
## 916             2        1     Laboratory Technician      3      Single
## 981             2        1      Sales Representative      4      Single
## 1013            1        1      Sales Representative      3      Single
## 1017            2        1        Research Scientist      2      Single
## 1033            2        1     Laboratory Technician      1      Single
## 1037            3        1     Laboratory Technician      4     Married
## 1040            3        1           Human Resources      3     Married
## 1061            3        1     Laboratory Technician      1      Single
## 1069            2        1     Laboratory Technician      1      Single
## 1111            3        1     Laboratory Technician      1    Divorced
## 1137            3        1     Laboratory Technician      2     Married
## 1154            3        1      Sales Representative      4      Single
## 1168            1        2           Sales Executive      4    Divorced
## 1206            3        1     Laboratory Technician      2      Single
## 1223            1        1           Human Resources      3     Married
## 1237            2        2           Sales Executive      1    Divorced
## 1238            1        2           Sales Executive      2      Single
## 1250            1        1      Sales Representative      2      Single
## 1256            3        3           Sales Executive      1      Single
## 1258            3        3           Sales Executive      3     Married
## 1263            2        1        Research Scientist      3     Married
## 1272            3        1      Sales Representative      2      Single
## 1274            3        1     Laboratory Technician      1     Married
## 1313            4        1           Human Resources      1     Married
## 1314            2        1           Human Resources      1    Divorced
## 1327            2        2           Sales Executive      2      Single
## 1333            2        1        Research Scientist      4      Single
## 1339            3        1      Sales Representative      4      Single
## 1340            3        1        Research Scientist      2      Single
## 1366            4        1      Sales Representative      1      Single
## 1370            3        2           Sales Executive      3      Single
## 1380            2        1           Human Resources      2     Married
## 1397            3        3           Sales Executive      1      Single
## 1439            3        1      Sales Representative      1     Married
## 1443            3        1        Research Scientist      4     Married
##      MnthInc MnthRate NumCoWorked Overtime PcntSalInc PerfRating
## 3       2090     2396           6      Yes         15          3
## 34      2086     3335           3       No         14          3
## 35      2293     3020           2      Yes         16          3
## 43      2293    10558           1       No         12          3
## 51      5381    19294           9      Yes         13          3
## 52      3441    11179           1      Yes         13          3
## 70      3388    21777           0      Yes         17          3
## 103     2926    19783           1      Yes         18          3
## 128     1675    26820           1      Yes         19          3
## 133     4559    24788           3      Yes         11          3
## 172     2325    20989           0       No         21          4
## 178     1102     9241           1       No         22          4
## 193     5916    15497           3      Yes         13          3
## 205     6673    11354           7      Yes         19          3
## 251    10048    22573           6       No         11          3
## 265     3485    14935           2       No         11          3
## 287     3161    19920           3      Yes         22          4
## 297     1420    25233           1       No         13          3
## 328     5238    17778           4      Yes         18          3
## 364     2851     9150           1      Yes         13          3
## 371     2716    25422           1       No         15          3
## 386     2285     3427           9      Yes         23          4
## 406     4031     9396           5       No         13          3
## 416     2351    12253           0       No         16          3
## 422     2546    18300           5       No         16          3
## 423     2564    18437           1       No         12          3
## 440     9824    22908           3       No         12          3
## 444     3894     9129           5       No         16          3
## 458     1878     8059           1      Yes         14          3
## 464     2340    23213           1      Yes         18          3
## 481     2033    14470           1       No         18          3
## 505     4286     5630           2       No         14          3
## 514     1009    26999           1      Yes         11          3
## 526     4577    24785           9       No         14          3
## 548     2759    20366           6      Yes         12          3
## 586     1601     3445           1      Yes         21          4
## 590     2319     6689           1      Yes         11          3
## 599     4382    16374           6       No         17          3
## 657     2795    18016           1      Yes         24          4
## 661     2380    13384           9      Yes         14          3
## 663     2044    22052           1       No         13          3
## 664     2693     8870           1       No         19          3
## 670     2404     4303           7      Yes         21          4
## 684     2413    18798           1      Yes         18          3
## 689     2121     9947           1      Yes         13          3
## 690     2973    13008           1       No         19          3
## 701     2479    26227           4       No         24          4
## 707    13194    17071           4      Yes         16          3
## 712     2404    11479           6      Yes         20          4
## 732     2600    18275           1      Yes         15          3
## 745     4777    14382           5       No         15          3
## 749     4969    21813           8       No         18          3
## 762     4834     7858           7       No         14          3
## 777     2323    17205           1      Yes         14          3
## 778     1416    17258           1       No         13          3
## 798     2377    19373           1       No         20          4
## 799     2313     2993           4      Yes         20          4
## 801     2596     7160           1       No         15          3
## 802     4728    17251           3      Yes         14          3
## 829     1904    13556           1       No         12          3
## 832     2610     6233           1       No         12          3
## 843     2515    22955           1      Yes         11          3
## 861     2853     4223           0      Yes         11          3
## 865     2107    20293           6       No         17          3
## 872     2210     3372           1       No         13          3
## 893     1859     6148           1      Yes         25          4
## 912     1118     8040           1      Yes         14          3
## 916     2625    25308           1       No         20          4
## 981     2785    11882           7       No         14          3
## 1013    1359    16154           1       No         12          3
## 1017    1261    22262           1       No         12          3
## 1033    3646    17181           2      Yes         23          4
## 1037    3722    21081           6      Yes         13          3
## 1040    2742     3072           1       No         15          3
## 1061    3172    16998           2      Yes         11          3
## 1069    2561     5355           7       No         11          3
## 1111    2074    26619           1      Yes         12          3
## 1137    2408     7324           1      Yes         17          3
## 1154    1569    18420           1      Yes         12          3
## 1168    5440    22098           6      Yes         14          3
## 1206    1393    24852           1       No         12          3
## 1223    1555    11585           1       No         11          3
## 1237    6134     8658           5      Yes         13          3
## 1238    6735    12147           6       No         15          3
## 1250    2760    14630           1       No         13          3
## 1256    8564    10092           2      Yes         20          4
## 1258    8161    19002           2       No         13          3
## 1263    2437    15587           9      Yes         16          3
## 1272    2679     4567           1       No         13          3
## 1274    2398    15999           1      Yes         17          3
## 1313    2956    21495           0       No         17          3
## 1314    2335     3157           4      Yes         15          3
## 1327    9907    26186           7      Yes         12          3
## 1333    2439    14753           1      Yes         24          4
## 1339    1081    16019           1       No         13          3
## 1340    2472    26092           1      Yes         23          4
## 1366    1091    10642           1       No         17          3
## 1370    9854    23352           3      Yes         11          3
## 1380    2863    19555           1       No         12          3
## 1397   10448     5843           6      Yes         13          3
## 1439    1790    26956           1       No         19          3
## 1443    4787    26124           9      Yes         14          3
##      RelationSat StckOptLev TotalWrkYrs TrainPrevYr WorkLifeBal YrsAtComp
## 3              2          0           7           3           3         0
## 34             3          1          19           6           4         1
## 35             1          1           6           2           2         2
## 43             3          0           1           2           2         1
## 51             4          0          23           2           3         1
## 52             3          0           2           3           2         2
## 70             1          1           2           0           2         1
## 103            2          0           1           5           3         1
## 128            4          0           0           2           2         0
## 133            3          1           4           2           3         2
## 172            1          0           1           5           4         0
## 178            3          0           1           3           2         1
## 193            1          0           8           1           3         1
## 205            2          0          17           2           3         1
## 251            2          2          17           5           3         1
## 265            3          0           5           5           1         0
## 287            4          1          19           0           1         1
## 297            3          0           0           2           3         0
## 328            1          0          12           3           2         1
## 364            2          0           1           2           3         1
## 371            4          0           1           0           3         1
## 386            3          0           3           4           3         1
## 406            3          1           6           5           3         2
## 416            4          1           3           3           2         2
## 422            2          0           6           2           4         2
## 423            3          0           1           3           4         1
## 440            1          0          12           2           3         1
## 444            3          0           4           3           3         2
## 458            4          0           0           3           3         0
## 464            2          0           1           3           1         1
## 481            3          1           1           2           4         1
## 505            4          2           5           4           3         1
## 514            4          0           1           5           3         1
## 526            1          0           4           3           3         2
## 548            4          0           7           2           3         2
## 586            3          2           1           2           3         0
## 590            4          1           1           1           3         1
## 599            4          0           5           3           2         2
## 657            3          0           1           2           1         1
## 661            4          1           3           3           2         1
## 663            4          0           2           3           2         2
## 664            1          0           1           3           2         1
## 670            4          0           8           2           1         2
## 684            3          3           1           2           3         1
## 689            2          0           1           3           4         1
## 690            2          0           1           2           3         1
## 701            1          0           7           4           3         1
## 707            4          0          22           2           2         1
## 712            3          0           3           5           3         0
## 732            1          0           1           2           3         1
## 745            1          0          15           2           1         1
## 749            4          0           7           6           3         2
## 762            2          1           9           3           2         1
## 777            2          0           2           3           3         2
## 778            1          0           1           6           2         1
## 798            3          1           1           0           2         1
## 799            2          0           5           0           3         2
## 801            1          2           1           2           3         1
## 802            4          0           5           4           3         0
## 829            4          0           0           0           3         0
## 832            3          1           2           5           2         2
## 843            4          0           1           4           2         1
## 861            2          1           1           5           3         0
## 865            1          1           5           2           1         1
## 872            1          1           1           3           1         1
## 893            2          0           1           2           4         1
## 912            4          0           1           4           3         1
## 916            3          0           2           2           1         2
## 981            3          0           3           3           4         1
## 1013           2          0           1           3           3         1
## 1017           3          0           1           3           4         1
## 1033           2          0          11           2           4         1
## 1037           3          1           7           2           1         2
## 1040           4          0           2           0           3         2
## 1061           3          0           4           2           2         0
## 1069           3          0           8           2           2         0
## 1111           4          1           1           2           3         1
## 1137           3          3           1           3           3         1
## 1154           3          0           0           2           4         0
## 1168           4          2           7           2           2         2
## 1206           1          0           1           2           3         1
## 1223           3          1           1           2           3         1
## 1237           2          3          16           3           3         2
## 1238           2          0          10           2           3         0
## 1250           3          0           2           3           3         2
## 1256           3          0          11           2           2         0
## 1258           1          3          10           2           3         1
## 1263           4          1           6           4           3         1
## 1272           2          0           1           3           3         1
## 1274           3          0           1           6           3         1
## 1313           3          0           2           4           3         1
## 1314           4          3           4           3           3         2
## 1327           3          0           7           3           2         2
## 1333           2          0           1           3           2         1
## 1339           3          0           1           3           2         1
## 1340           1          0           1           2           3         1
## 1366           4          0           1           3           3         1
## 1370           4          0           6           0           3         2
## 1380           1          0           1           2           3         1
## 1397           2          0          15           2           2         2
## 1439           1          1           1           3           2         1
## 1443           2          3           4           3           4         2
##      YrsCurrRole YrsSncPromo YrsWtCurMgr  LifeSat
## 3              0           0           0 2.666667
## 34             0           0           0 3.666667
## 35             0           2           0 2.333333
## 43             0           0           1 2.666667
## 51             0           0           0 3.333333
## 52             2           2           2 2.666667
## 70             0           0           0 2.000000
## 103            0           1           0 3.000000
## 128            0           0           0 3.000000
## 133            2           2           2 3.000000
## 172            0           0           0 2.000000
## 178            0           1           0 3.000000
## 193            0           0           1 2.333333
## 205            0           0           0 2.000000
## 251            0           0           0 2.666667
## 265            0           0           0 2.333333
## 287            0           0           0 2.666667
## 297            0           0           0 3.000000
## 328            0           0           0 2.000000
## 364            0           0           0 2.666667
## 371            0           0           0 3.000000
## 386            0           0           0 3.333333
## 406            2           0           2 2.333333
## 416            2           1           0 3.000000
## 422            2           1           1 2.666667
## 423            0           0           0 3.666667
## 440            0           0           0 2.333333
## 444            2           1           2 3.000000
## 458            0           0           0 3.000000
## 464            0           0           0 2.333333
## 481            0           0           0 2.666667
## 505            1           0           0 2.666667
## 514            0           1           1 3.333333
## 526            2           2           0 2.333333
## 548            2           2           2 3.333333
## 586            0           0           0 2.333333
## 590            0           0           0 2.666667
## 599            2           2           1 3.000000
## 657            0           0           1 2.666667
## 661            0           0           0 3.333333
## 663            2           0           2 3.000000
## 664            0           0           0 2.333333
## 670            2           2           2 2.000000
## 684            0           0           0 2.666667
## 689            0           0           0 2.666667
## 690            0           0           0 2.000000
## 701            0           0           0 2.333333
## 707            0           0           0 2.666667
## 712            0           0           0 2.333333
## 732            0           0           0 1.666667
## 745            0           0           0 1.333333
## 749            2           2           2 2.666667
## 762            0           0           0 2.333333
## 777            2           0           2 3.000000
## 778            0           1           0 1.333333
## 798            1           0           0 2.666667
## 799            2           2           2 2.333333
## 801            0           0           0 2.000000
## 802            0           0           0 3.333333
## 829            0           0           0 3.333333
## 832            2           2           2 2.666667
## 843            1           0           0 3.333333
## 861            0           0           0 3.000000
## 865            0           0           0 1.000000
## 872            0           0           0 1.333333
## 893            1           0           0 2.666667
## 912            0           1           0 3.666667
## 916            2           2           2 2.333333
## 981            0           0           0 3.666667
## 1013           0           0           0 2.666667
## 1017           0           0           0 3.000000
## 1033           0           0           0 2.333333
## 1037           2           2           2 2.666667
## 1040           2           2           2 3.333333
## 1061           0           0           0 2.000000
## 1069           0           0           0 2.000000
## 1111           0           0           0 2.666667
## 1137           1           0           0 2.666667
## 1154           0           0           0 3.666667
## 1168           2           2           2 3.333333
## 1206           0           0           0 2.000000
## 1223           0           0           0 3.000000
## 1237           2           2           2 2.000000
## 1238           0           0           0 2.333333
## 1250           2           2           2 2.666667
## 1256           0           0           0 2.000000
## 1258           0           0           0 2.333333
## 1263           0           0           0 3.333333
## 1272           0           1           0 2.333333
## 1274           0           0           0 2.333333
## 1313           0           0           0 2.333333
## 1314           2           2           0 2.666667
## 1327           2           2           2 2.333333
## 1333           0           1           0 2.666667
## 1339           0           0           0 3.000000
## 1340           0           0           0 2.000000
## 1366           0           0           0 2.666667
## 1370           0           2           2 3.333333
## 1380           0           0           0 2.000000
## 1397           2           2           2 1.666667
## 1439           0           1           0 1.333333
## 1443           2           2           2 3.333333

## List of 5
##  $ axis.title.x:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 10
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.text.x :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks.y: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

Conclusion

Top factors that contribute to turnover:

For our analysis, we found that the following most attributed to the attrition:

  1. Overtime
    • Those employees that worked overtime led to attrition
  2. Job Level
    • The employees that had lower job level (1, 2) most likely the ones to leave
  3. Stock Option Level
    • Those employees with stock options at level 0,1 contributed to attrition
  4. Job Satisfaction
    • Those employees that rated their job satisfaction level 0, 1 most likely to leave
  5. Age
    • The younger aged employees fit the profile of those employees leaving
  6. Monthly Income
    • Employees that make a lower income are contributing to attrition

Job-specific trends.

The Job specific trends that surfaced with our analysis include:

  1. Sales Representatives, Laboratory Technicians, and Human Resources had the largest attrition percentages
  2. Sales Representatives are some of the youngest employees.
  3. Sales representative also have the lowest total working years, years in current role, years at current company and years with current manager

Other considerations

Insights

  1. Component that we discovered during our analysis is that was that overtime was quite high among those employees that left.

  2. The employees that had lower job level (1, 2) most likely the ones to leave

  3. Those employees with stock options at level (0, 1) contributed to attrition

  4. Those employees that rated their job satisfaction level 0, 1 most likely to leave

  5. The younger aged employees fit the profile of those employees leaving

  6. Employees that make a lower income are contributing to attrition

Recommendations

  1. Our recommendation is to reduce the amount of overtime and offer other incentives for having to work overtime - like extended holiday time, gift cards for family dinner

  2. In relation to job level, still need to have entry level positions. However, create career tracks for the entry level positions like lab technicians. There needs to be a career development path so new and existing employees can see their long term potential in the company

  3. For stock options, our recommendation is to invest in your employees by offering higher option level. One aspect to this that has been successful in the past is to offer stock options to employees with a 4 year vesting period and have yearly refresh grants based on performance. This will ensure vested interested by employees by seeing a direct correlation in the success of the business will have a positive financial impact for them as well

  4. Our recommendation to increase job satisifaction is to become a more inclusive company by offering a workplace that offers creative development. Folks should want to come to work. Offering meal compensations and for employees with long commutes, offering toll tag incentives.

  5. Fresh, new talent is the future and lifeblood of your company - introducing them and having them start off in other parts of the company will only reinforce the idea of inclusion and make them feel they are a part of something and we recommend starting young employees off in other parts of the business so they can grow, learn and feel they are apart of the bigger picture

  6. The low pay in young, new talent is causing attrition. Our recommendation is to increase the starting salary of employees. In the long run this will cost you less as you are not having to train and ramp new talent up and then have them leave the company

Improvements

  1. Provide concrete definition for Life Satisfaction so that it can be included in future analysis
  2. Define the Stock Option levels in more detail and what that means